from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import sqlite3
import numpy as np
import pandas as pd
import seaborn as sns
# matplotlibのグラフをRetinaの高解像度で表示する
%config InlineBackend.figure_formats = {'png', 'retina'}
# Jupyter Notebookの中で作図した画像を表示させる
%matplotlib inline
# matplotlib をインポートする
import matplotlib.pyplot as plt
# 図のサイズを12inch x 12inch = 864px X 864px にする
plt.rcParams['figure.figsize'] = 16, 9
# 日本語タイトルのため、japanizeをインポートする
import japanize_matplotlib
plt.rcParams['font.family'] = 'IPAexGothic'
pwd
'C:\\Users\\tkjyoke\\pythonProject\\sowi_com'
cd r361
[WinError 2] 指定されたファイルが見つかりません。: 'r361' C:\Users\tkjyoke\pythonProject\sowi_com
cd D:
D:\
cd jupyter
D:\jupyter
ls
ドライブ D のボリューム ラベルは ボリューム です
ボリューム シリアル番号は C64A-827C です
D:\jupyter のディレクトリ
2023/04/12 16:42 <DIR> .
2023/04/12 16:42 <DIR> ..
2022/12/27 16:26 <DIR> .ipynb_checkpoints
2022/10/07 13:12 60,342 【Master_to_USE】https_log_read_04_28_glob_used.ipynb
2022/12/01 14:12 33,357 【ローデータ1125】1201.xlsx
2022/12/12 16:22 44,539 【ローデータ1206】1212.xlsx
2022/12/12 16:23 244,750 【視聴データ1206】1212.pdf
2023/01/06 13:01 28,400 03-03-merging.ipynb
2022/12/01 14:36 107,260 11_25視聴分析.pptx
2022/11/10 12:05 66,631 1101_view_statics.pptx
2022/11/10 11:27 62,678 1101ローデータ(1104).xlsx
2022/12/13 08:59 186,467 12_06_11_25視聴分析.pptx
2022/04/14 16:42 37,549 132_jirei_addr_non_dup_04_14_r4.ipynb
2022/04/14 15:03 220,752 2E546500
2022/03/08 12:52 370,631 46729840
2022/05/06 16:40 57,584 8FC75E20
2022/02/15 13:33 177,571 ad_pl_qa_02_14_todays_final.ipynb
2022/02/15 15:49 300,404 ad_pl_qa_02_15_final.ipynb
2022/02/21 10:23 594,508 ad_pl_qa_02_16_fina[.ipynb
2022/02/24 11:41 695,928 ad_pl_qa_02_22_final_v4-Copy1.ipynb
2022/02/24 16:44 225,352 ad_pl_qa_02_22_final_v5-Copy1.ipynb
2022/02/28 15:45 106,290 ad_pl_qa_02_28_final_v10_for_list.ipynb
2022/02/28 13:02 2,057,652 ad_pl_qa_02_28_noon_v8.ipynb
2022/03/02 15:23 1,836,188 ad_pl_qa_03_02_noon_v14.ipynb
2022/03/02 12:00 291,971 ad_pl_qa_03_02_noon_v14-Copy1.ipynb
2022/03/03 10:53 2,298,282 ad_pl_qa_03_03_w_ct_v2_bk.ipynb
2022/03/08 08:38 370,738 AD_PL_QA_03_08_203ans_final.xlsx
2022/03/16 15:07 226,818 ad_pl_qa_03_16_cross_table_v4_203ans-backup.ipynb
2022/03/18 12:07 3,014,582 ad_pl_qa_03_17_203ans_multiple_answer_ct_new_elder_rate_v1.ipynb
2022/04/05 15:11 3,322,798 ad_pl_qa_04_05_r4_cross_table_v6_196ans_wo_dup_new_elder_rate.ipynb
2022/03/01 10:39 5,344 adpl_ind.xlsx
2022/04/20 13:35 472,442 ADPL_q37_372_KHCoder.pptx
2022/04/05 15:10 166,968 adpl_qa_ans_row_196_by_51_apr_05.xlsx
2022/03/16 14:30 2,176,895 adpl_qa_final_2.sav
2022/03/08 11:21 1,040,097 adpl_qa_summary_1.pptx
2022/03/10 16:31 2,635,343 adpl_qa_summary_cross_table.pptx
2022/03/10 16:31 2,635,342 adpl_qa_summary_cross_table_v1.pptx
2022/03/16 16:34 2,597,233 adpl_qa_summary_cross_table_v2_03_16.pptx
2022/03/17 10:19 2,555,744 adpl_qa_summary_cross_table_v3_03_17.pptx
2022/03/18 11:09 2,863,009 adpl_qa_summary_cross_table_v3_03_18.pptx
2022/03/01 10:39 5,278 adpl_year.xlsx
2021/01/06 09:53 34,391,374 AI_pdf.pdf
2022/10/12 09:27 30,904 AttendeeReport (1).xlsx
2022/10/17 11:48 27,088 AttendeeReport (2).xlsx
2022/02/04 16:31 790,521 california-housing-prices-regression-with-xgboost.ipynb
2022/02/04 16:36 790,521 california-housing-prices-regression-with-xgboost_02_04_r4.ipynb
2022/02/14 13:54 8,200 cols.xlsx
2022/12/16 10:36 20,976 create_sql_lookup_for_contest_winner2022_2015.ipynb
2022/03/31 14:33 4,885 cross_table_chi2_test.ipynb
2023/03/22 16:45 23,005 cross_table_visualization.xlsx
2023/03/22 14:56 2,162 ct4.csv
2022/05/06 13:37 50,110 D150BD20.xlsx
2022/04/14 15:02 220,769 DB636500
2022/02/14 13:25 60,423 df.xlsx
2022/02/22 11:44 147,344 df_all_q29_OHE.xlsx
2022/03/01 10:39 17,119 df_q11_OHE.xlsx
2022/03/01 10:39 348,793 dfall_ans.xlsx
2023/04/06 09:08 4,494,149 Downloads (6).zip
2022/03/25 14:44 7,593 dummy_df.xlsx
2022/03/25 15:09 10,742 dummy2_df.xlsx
2023/04/07 09:33 29,263 E566E640
2022/05/06 15:36 57,592 elder.xlsx
2022/05/06 16:24 19,652 elder_企業名_cleaning_to_input_sqllite.ipynb
2022/05/06 16:29 44,121 elder_企業名_cleaning_to_input_sqllite-Copy1.ipynb
2022/05/02 16:21 50,151 elder_掲載企業法人名_0502_v2.xlsx
2022/05/06 14:14 59,676 elder_掲載企業法人名_0506_v3.xlsx
2023/04/12 16:40 115,430 employment.png
2023/04/07 10:16 739,358 employment.svg
2023/04/12 16:39 116,832 employment_df1.png
2023/04/12 16:39 115,430 employment_df2.png
2023/04/10 14:46 28,639 employment_mean_min_max.svg
2023/04/12 14:55 400,521 employment1.png
2023/04/07 10:37 740,255 employment1.svg
2023/04/12 13:47 397,750 employment2.png
2023/04/07 10:37 739,358 employment2.svg
2023/04/12 14:56 384,673 employment3.png
2022/11/10 11:53 9,272 Excel.xlsx
2022/11/10 11:55 9,662 Excel2.xlsx
2022/03/08 08:50 1,101,727 final_アドバイザープランナーアンケートR4_03_08_r4.pdf
2022/03/25 16:33 13,443 google_form1.PNG
2022/03/25 16:04 12,284 google_forms.PNG
2019/09/22 01:36 1,423,529 housing.csv
2022/02/03 11:05 409,382 housing.csv.zip
2022/02/02 14:32 366,590 housing_price_xgboost_01_26_v1.ipynb
2022/02/02 14:33 366,590 housing_price_xgboost_02_02_v1.ipynb
2022/01/31 12:59 1,989,479 housing0131_v4.csv
2022/02/02 14:29 1,969,954 housing0202_v5.csv
2022/01/26 12:02 2,028,521 housing2.csv
2022/01/26 15:36 1,989,479 housing3.csv
2022/02/03 12:54 1,711,232 housing4.csv
2022/02/02 15:59 329 housing-main.zip
2022/02/07 16:36 249,856 hr_01.db
2022/08/26 15:17 <DIR> hr_db
2022/05/26 16:20 225,280 HR_Employee_Attrition.db
2022/07/15 16:22 38,607 hr_test.csv
2023/03/23 15:42 130,157 img_hr.png
2022/03/17 16:17 14,789 join_list_revisit_study.ipynb
2021/12/28 14:57 3,122 list_naiho_expression_must_understand.ipynb
2022/11/17 16:43 26,030 mailaddr_132_wo_dup.xlsx
2022/12/26 13:43 36,944 matome_pandas_df_mamupulating.ipynb
2022/03/16 14:32 32,888 mecab_full_explained_01_11_2022.ipynb
2022/03/14 08:52 53,174 mecab_test_12_21-Copy1.ipynb
2022/01/11 14:27 168,374 mecab_test_12_27.ipynb
2023/01/12 16:03 <DIR> moushikomi_r4
2022/03/25 16:15 3,307,304 multiple_answer_shuukei.ipynb
2022/12/14 14:15 45,688 MUST_Save_mouhikomi_data_all_r4.ipynb
2022/10/04 13:19 459,637 outlook_to_eml_file_conversion_101.pptx
2021/06/11 02:24 1,397,547 property_values_score.sav
2022/02/15 16:48 13,521 q21_OHE.xlsx
2022/03/01 10:39 6,026 q29_ans_list.xlsx
2022/02/22 11:37 147,344 q29_OHE.xlsx
2022/02/24 09:28 1,063,385 r4_02_24_171ans.pdf
2022/03/01 16:39 <DIR> r4_03_01
2022/03/03 09:03 <DIR> r4_03_03
2022/03/18 11:43 <DIR> r4_03_14
2022/03/14 14:10 707,752 r4_03_14.zip
2022/03/18 13:42 <DIR> r4_03_18
2022/12/16 10:02 1,540,539 read_csv_df_filter_concat_rename_truncate_value_count_df_pie_barplot.ipynb
2022/12/19 15:38 713,521 read_r3_61_245447by137_sns_plot_v1.ipynb
2022/12/27 09:01 163,313 read_r3_61_sql_select_here_document_12_26_R4.ipynb
2022/12/27 14:50 240,882 read_r3_61_突合_contest_winner_243_1227_2022.ipynb
2022/12/27 16:23 218,010 read_shokulab_突合_contest_winner_243_1227_2022.ipynb
2022/12/26 14:33 294,993 read_write_sql_excelz_csv_cols_divide_cols_matching_rename_cols_SQL_SELECT.ipynb
2022/04/05 15:58 7,383,672 ReliefJetEssentialsOutlook.exe
2022/10/06 12:09 <DIR> renamed_dir
2022/11/29 14:04 36,147 SAVE_Final_teams_AttendeeReport_sumarize_1101_tokyo.ipynb
2022/12/01 14:29 305,869 SAVE_Final_teams_AttendeeReport_sumarize_1125_tokyo.ipynb
2022/12/08 16:02 180,021 SAVE_Final_teams_AttendeeReport_sumarize_1125_tokyo-Copy1.ipynb
2022/12/13 08:55 316,488 SAVE_Final_teams_AttendeeReport_sumarize_1206.ipynb
2022/12/01 11:58 58,001 SAVE_FOR_USE_132_MAIL_CREATION_Dec_01_r4_no_yakushoku.ipynb
2022/04/12 13:31 20,418 SAVE_FOR_USE_EACH_MAIL_CREATION_APR_05_V1.ipynb
2022/11/09 13:55 14,744 SAVE_FOR_USE_EACH_MAIL_CREATION_NOV_08_v3_yakushoku.ipynb
2022/10/07 10:26 15,601 SAVE_FOR_USE_EACH_MAIL_CREATION_OCT_04_v1.ipynb
2022/11/08 13:20 15,711 SAVE_FOR_USE_EACH_MAIL_CREATION_OCT_04_v2_yakushoku.ipynb
2022/10/20 14:13 373,425 SAVE_teams_AttendeeReport_sumarize_1017_v3.ipynb
2022/03/01 10:39 5,126 sogyo_shien.xlsx
2022/03/30 16:56 2,327 spss_cross_basic.sav
2022/03/30 14:40 12,564 spss_cross_basic.xlsx
2022/03/30 16:56 691 spss_cross_basic2.sav
2022/03/30 14:42 8,441 spss_cross_basic2.xlsx
2022/03/08 16:46 1,993,020 spss_mecab_memo_0307_v8_2022.pptx
2022/03/10 13:01 2,155,416 spss_mecab_memo_0310_v9_2022.pptx
2022/03/25 16:44 2,368,118 spss_mecab_memo_0316_v10_2022.pptx
2022/02/07 15:11 120,511 sql_01_20_r4.ipynb
2023/03/08 11:10 <DIR> sqliteDB
2022/01/24 13:11 <DIR> T_SHOKUBA_20220119
2022/01/21 16:49 23,797,025 T_SHOKUBA_20220119.zip
2022/03/01 10:39 5,703 tdfk.xlsx
2022/03/03 13:16 524 tdfk_ID.txt
2022/10/14 16:46 136,875 teams_AttendeeReport_manipulate_1012_2022_v1.ipynb
2022/10/17 10:35 347,053 teams_AttendeeReport_manipulate_1017_2022_v1 (3).ipynb
2021/06/11 02:24 121,115 telco.sav
2021/06/11 02:24 62,928 telco_Feb.sav
2021/06/11 02:24 62,320 telco_Jan.sav
2021/11/02 16:12 0 test.text
2022/05/06 16:02 57,477 test2.xlsx
2022/03/10 11:02 87,361 value_counts_visualization_03_10.ipynb
2022/08/26 15:16 227,974 WA_Fn-UseC_-HR-Employee-Attrition.csv
2022/04/21 16:34 632,372 wordcloud_04_19_2022_v1.pptx
2022/01/21 15:41 2,028,526 xgb.csv
2022/04/14 14:57 220,815 マスターリスト_132_wo_dup_url_w_61_w_ID_04_14_2022_v25.xlsm
2023/03/31 15:21 <DIR> ローデータ
2022/03/18 11:32 3,456,179 活動支援ツールと啓発に関するニーズ調査_クロス集計_WordCloud_総務省統計反映_196_03_18.pptx
2022/03/18 13:41 3,885,088 活動支援ツールと啓発に関するニーズ調査_クロス集計_WordCloud_総務省統計反映_196_03_18_v1.pptx
2022/03/18 14:27 3,836,388 活動支援ツールと啓発に関するニーズ調査_クロス集計_WordCloud_総務省統計反映_196_03_18_v2.pptx
2022/08/04 11:12 18,774 基準データ.xlsx
2023/04/07 09:22 29,712 基準データ_2023.xlsx
2023/04/07 09:59 31,163 基準データ2.xlsx
2023/04/07 10:03 30,932 基準データ2023.xlsx
2023/04/07 09:35 29,248 基準データ22.xlsx
2022/08/04 11:49 513,441 雇用力基準データ.pptx
2022/08/05 13:43 677,937 雇用力基準データ_08_05_v2.pptx
2023/04/07 13:05 1,857,261 雇用力調査0404_r4_q2q.pptx
2023/04/07 13:43 1,979,448 雇用力調査0407_r4_final.pptx
2023/04/07 16:43 1,957,465 雇用力調査0407_r4_final2.pptx
2023/04/07 13:06 1,857,260 雇用力調査0407_r4_q2q.pptx
2023/04/10 15:41 2,969,310 雇用力調査0410_r4_final3.pptx
2023/03/31 15:21 <DIR> 集計表
164 個のファイル 170,683,237 バイト
14 個のディレクトリ 207,676,071,936 バイトの空き領域
#xlsx = pd.ExcelFile('r3_61_245447by135_v2.xlsx')
#df = pd.read_excel(xlsx, '05_23', index_col=None, header=0)
import codecs
with codecs.open("WA_Fn-UseC_-HR-Employee-Attrition.csv", mode ="r", encoding ="utf-8", errors="ignore") as file:
df = pd.read_csv(file, delimiter =",", header=0)
df
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | ... | 1 | 80 | 0 | 8 | 0 | 1 | 6 | 4 | 0 | 5 |
| 1 | 49 | No | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | ... | 4 | 80 | 1 | 10 | 3 | 3 | 10 | 7 | 1 | 7 |
| 2 | 37 | Yes | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | ... | 2 | 80 | 0 | 7 | 3 | 3 | 0 | 0 | 0 | 0 |
| 3 | 33 | No | Travel_Frequently | 1392 | Research & Development | 3 | 4 | Life Sciences | 1 | 5 | ... | 3 | 80 | 0 | 8 | 3 | 3 | 8 | 7 | 3 | 0 |
| 4 | 27 | No | Travel_Rarely | 591 | Research & Development | 2 | 1 | Medical | 1 | 7 | ... | 4 | 80 | 1 | 6 | 3 | 3 | 2 | 2 | 2 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1465 | 36 | No | Travel_Frequently | 884 | Research & Development | 23 | 2 | Medical | 1 | 2061 | ... | 3 | 80 | 1 | 17 | 3 | 3 | 5 | 2 | 0 | 3 |
| 1466 | 39 | No | Travel_Rarely | 613 | Research & Development | 6 | 1 | Medical | 1 | 2062 | ... | 1 | 80 | 1 | 9 | 5 | 3 | 7 | 7 | 1 | 7 |
| 1467 | 27 | No | Travel_Rarely | 155 | Research & Development | 4 | 3 | Life Sciences | 1 | 2064 | ... | 2 | 80 | 1 | 6 | 0 | 3 | 6 | 2 | 0 | 3 |
| 1468 | 49 | No | Travel_Frequently | 1023 | Sales | 2 | 3 | Medical | 1 | 2065 | ... | 4 | 80 | 0 | 17 | 3 | 2 | 9 | 6 | 0 | 8 |
| 1469 | 34 | No | Travel_Rarely | 628 | Research & Development | 8 | 3 | Medical | 1 | 2068 | ... | 1 | 80 | 0 | 6 | 3 | 4 | 4 | 3 | 1 | 2 |
1470 rows × 35 columns
dfbk = df
df = dfbk
for col in df.columns:
print(col, len(df[col].unique()), df[col].unique())
Age 43 [41 49 37 33 27 32 59 30 38 36 35 29 31 34 28 22 53 24 21 42 44 46 39 43 50 26 48 55 45 56 23 51 40 54 58 20 25 19 57 52 47 18 60] Attrition 2 ['Yes' 'No'] BusinessTravel 3 ['Travel_Rarely' 'Travel_Frequently' 'Non-Travel'] DailyRate 886 [1102 279 1373 1392 591 1005 1324 1358 216 1299 809 153 670 1346 103 1389 334 1123 1219 371 673 1218 419 391 699 1282 1125 691 477 705 924 1459 125 895 813 1273 869 890 852 1141 464 1240 1357 994 721 1360 1065 408 1211 1229 626 1434 1488 1097 1443 515 853 1142 655 1115 427 653 989 1435 1223 836 1195 1339 664 318 1225 1328 1082 548 132 746 776 193 397 945 1214 111 573 1153 1400 541 432 288 669 530 632 1334 638 1093 1217 1353 120 682 489 807 827 871 665 1040 1420 240 1280 534 1456 658 142 1127 1031 1189 1354 1467 922 394 1312 750 441 684 249 841 147 528 594 470 957 542 802 1355 1150 1329 959 1033 1316 364 438 689 201 1427 857 933 1181 1395 662 1436 194 967 1496 1169 1145 630 303 1256 440 1450 1452 465 702 1157 602 1480 1268 713 134 526 1380 140 629 1356 328 1084 931 692 1069 313 894 556 1344 290 138 926 1261 472 1002 878 905 1180 121 1136 635 1151 644 1045 829 1242 1469 896 992 1052 1147 1396 663 119 979 319 1413 944 1323 532 818 854 1034 771 1401 1431 976 1411 1300 252 1327 832 1017 1199 504 505 916 1247 685 269 1416 833 307 1311 128 488 529 1210 1463 675 1385 1403 452 666 1158 228 996 728 1315 322 1479 797 1070 442 496 1372 920 688 1449 1117 636 506 444 950 889 555 230 1232 566 1302 812 1476 218 1132 1105 906 849 390 106 1249 192 553 117 185 1091 723 1220 588 1377 1018 1275 798 672 1162 508 1482 559 210 928 1001 549 1124 738 570 1130 1192 343 144 1296 1309 483 810 544 1062 1319 641 1332 756 845 593 1171 350 921 1144 143 1046 575 156 1283 755 304 1178 329 1362 1371 202 253 164 1107 759 1305 982 821 1381 480 1473 891 1063 645 1490 317 422 1485 1368 1448 296 1398 1349 986 1099 1116 1499 983 1009 1303 1274 1277 587 413 1276 988 1474 163 267 619 302 443 828 561 426 232 1306 1094 509 775 195 258 471 799 956 535 1495 446 1245 703 823 1246 622 1287 448 254 1365 538 525 558 782 362 1236 1112 204 1343 604 1216 646 160 238 1397 306 991 482 1176 913 1076 727 885 243 806 817 1410 1207 1442 693 929 562 608 580 970 1179 294 314 316 654 168 381 217 501 650 141 804 975 1090 346 430 268 167 621 527 883 954 310 719 725 715 657 1146 182 376 571 384 791 1111 1243 1092 1325 805 213 118 676 1252 286 1258 932 1041 859 720 946 1184 436 589 760 887 1318 625 180 586 1012 661 930 342 1230 1271 1278 607 130 300 583 1418 1269 379 395 1265 1222 341 868 1231 102 881 1383 1075 374 1086 781 177 500 1425 1454 617 1085 995 1122 618 546 462 1198 1272 154 1137 1188 188 1333 867 263 938 129 616 498 1404 1053 289 1376 231 152 882 903 1379 335 722 461 974 1126 840 1134 248 955 939 1391 1206 287 1441 109 1066 277 466 1055 265 135 247 1035 266 145 1038 1234 1109 1089 788 124 660 1186 1464 796 415 769 1003 1366 330 1492 1204 309 1330 469 697 1262 1050 770 406 203 1308 984 439 793 1451 1182 174 490 718 433 773 603 874 367 199 481 647 1384 902 819 862 1457 977 942 1402 1421 1361 917 200 150 179 696 116 363 107 1465 458 1212 1103 966 1010 326 1098 969 1167 694 1320 536 373 599 251 131 237 1429 648 735 531 429 968 879 640 412 848 360 1138 325 1322 299 1030 634 524 256 1060 935 495 282 206 943 523 507 601 855 1291 1405 1369 999 1202 285 404 736 1498 1200 1439 499 205 683 1462 949 652 332 1475 337 971 1174 667 560 172 383 1255 359 401 377 592 1445 1221 866 981 447 1326 748 990 405 115 790 830 1193 1423 467 271 410 1083 516 224 136 1029 333 1440 674 1342 898 824 492 598 740 888 1288 104 1108 479 1351 474 437 884 1370 264 1059 563 457 1313 241 1015 336 1387 170 208 671 711 737 1470 365 763 567 486 772 301 311 584 880 392 148 708 1259 786 370 678 146 581 918 1238 585 741 552 369 717 543 964 792 611 176 897 600 1054 428 181 211 1079 590 305 953 478 1375 244 511 1294 196 734 1239 1253 1128 1336 234 766 261 1194 431 572 1422 1297 574 355 207 706 280 726 414 352 1224 459 1254 1131 835 1172 1266 783 219 1213 1096 1251 1394 605 1064 1337 937 157 754 1168 155 1444 189 911 1321 1154 557 642 801 161 1382 1037 105 582 704 345 1120 1378 468 613 1023 628] Department 3 ['Sales' 'Research & Development' 'Human Resources'] DistanceFromHome 29 [ 1 8 2 3 24 23 27 16 15 26 19 21 5 11 9 7 6 10 4 25 12 18 29 22 14 20 28 17 13] Education 5 [2 1 4 3 5] EducationField 6 ['Life Sciences' 'Other' 'Medical' 'Marketing' 'Technical Degree' 'Human Resources'] EmployeeCount 1 [1] EmployeeNumber 1470 [ 1 2 4 ... 2064 2065 2068] EnvironmentSatisfaction 4 [2 3 4 1] Gender 2 ['Female' 'Male'] HourlyRate 71 [ 94 61 92 56 40 79 81 67 44 84 49 31 93 50 51 80 96 78 45 82 53 83 58 72 48 42 41 86 97 75 33 37 73 98 36 47 71 30 43 99 59 95 57 76 87 66 55 32 52 70 62 64 63 60 100 46 39 77 35 91 54 34 90 65 88 85 89 68 69 74 38] JobInvolvement 4 [3 2 4 1] JobLevel 5 [2 1 3 4 5] JobRole 9 ['Sales Executive' 'Research Scientist' 'Laboratory Technician' 'Manufacturing Director' 'Healthcare Representative' 'Manager' 'Sales Representative' 'Research Director' 'Human Resources'] JobSatisfaction 4 [4 2 3 1] MaritalStatus 3 ['Single' 'Married' 'Divorced'] MonthlyIncome 1349 [5993 5130 2090 ... 9991 5390 4404] MonthlyRate 1427 [19479 24907 2396 ... 5174 13243 10228] NumCompaniesWorked 10 [8 1 6 9 0 4 5 2 7 3] Over18 1 ['Y'] OverTime 2 ['Yes' 'No'] PercentSalaryHike 15 [11 23 15 12 13 20 22 21 17 14 16 18 19 24 25] PerformanceRating 2 [3 4] RelationshipSatisfaction 4 [1 4 2 3] StandardHours 1 [80] StockOptionLevel 4 [0 1 3 2] TotalWorkingYears 40 [ 8 10 7 6 12 1 17 5 3 31 13 0 26 24 22 9 19 2 23 14 15 4 29 28 21 25 20 11 16 37 38 30 40 18 36 34 32 33 35 27] TrainingTimesLastYear 7 [0 3 2 5 1 4 6] WorkLifeBalance 4 [1 3 2 4] YearsAtCompany 37 [ 6 10 0 8 2 7 1 9 5 4 25 3 12 14 22 15 27 21 17 11 13 37 16 20 40 24 33 19 36 18 29 31 32 34 26 30 23] YearsInCurrentRole 19 [ 4 7 0 2 5 9 8 3 6 13 1 15 14 16 11 10 12 18 17] YearsSinceLastPromotion 16 [ 0 1 3 2 7 4 8 6 5 15 9 13 12 10 11 14] YearsWithCurrManager 18 [ 5 7 0 2 6 8 3 11 17 1 4 12 9 10 15 13 16 14]
## Check for unique values of categorical variables
df_overview = pd.DataFrame([[i, len(df[i].unique()), df[i].dtypes, df[i].isnull().sum()] for i in df.columns],
columns=['Feature', 'Unique Values', 'dtypes', 'NaN']).set_index('Feature')
print(df.shape)
df_overview
(1470, 35)
| Unique Values | dtypes | NaN | |
|---|---|---|---|
| Feature | |||
| Age | 43 | int64 | 0 |
| Attrition | 2 | object | 0 |
| BusinessTravel | 3 | object | 0 |
| DailyRate | 886 | int64 | 0 |
| Department | 3 | object | 0 |
| DistanceFromHome | 29 | int64 | 0 |
| Education | 5 | int64 | 0 |
| EducationField | 6 | object | 0 |
| EmployeeCount | 1 | int64 | 0 |
| EmployeeNumber | 1470 | int64 | 0 |
| EnvironmentSatisfaction | 4 | int64 | 0 |
| Gender | 2 | object | 0 |
| HourlyRate | 71 | int64 | 0 |
| JobInvolvement | 4 | int64 | 0 |
| JobLevel | 5 | int64 | 0 |
| JobRole | 9 | object | 0 |
| JobSatisfaction | 4 | int64 | 0 |
| MaritalStatus | 3 | object | 0 |
| MonthlyIncome | 1349 | int64 | 0 |
| MonthlyRate | 1427 | int64 | 0 |
| NumCompaniesWorked | 10 | int64 | 0 |
| Over18 | 1 | object | 0 |
| OverTime | 2 | object | 0 |
| PercentSalaryHike | 15 | int64 | 0 |
| PerformanceRating | 2 | int64 | 0 |
| RelationshipSatisfaction | 4 | int64 | 0 |
| StandardHours | 1 | int64 | 0 |
| StockOptionLevel | 4 | int64 | 0 |
| TotalWorkingYears | 40 | int64 | 0 |
| TrainingTimesLastYear | 7 | int64 | 0 |
| WorkLifeBalance | 4 | int64 | 0 |
| YearsAtCompany | 37 | int64 | 0 |
| YearsInCurrentRole | 19 | int64 | 0 |
| YearsSinceLastPromotion | 16 | int64 | 0 |
| YearsWithCurrManager | 18 | int64 | 0 |
df = df.iloc[:, [0,1,2,4,5,6,7,11,8,9]]
## Check for unique values of categorical variables
df_overview = pd.DataFrame([[i, len(df[i].unique()), df[i].dtypes, df[i].isnull().sum()] for i in df.columns],
columns=['Feature', 'Unique Values', 'dtypes', 'NaN']).set_index('Feature')
print(df.shape)
df_overview
(1470, 10)
| Unique Values | dtypes | NaN | |
|---|---|---|---|
| Feature | |||
| Age | 43 | int64 | 0 |
| Attrition | 2 | object | 0 |
| BusinessTravel | 3 | object | 0 |
| Department | 3 | object | 0 |
| DistanceFromHome | 29 | int64 | 0 |
| Education | 5 | int64 | 0 |
| EducationField | 6 | object | 0 |
| Gender | 2 | object | 0 |
| EmployeeCount | 1 | int64 | 0 |
| EmployeeNumber | 1470 | int64 | 0 |
for col in df.columns:
print(col, len(df[col].unique()), df[col].unique())
Age 43 [41 49 37 33 27 32 59 30 38 36 35 29 31 34 28 22 53 24 21 42 44 46 39 43 50 26 48 55 45 56 23 51 40 54 58 20 25 19 57 52 47 18 60] Attrition 2 ['Yes' 'No'] BusinessTravel 3 ['Travel_Rarely' 'Travel_Frequently' 'Non-Travel'] Department 3 ['Sales' 'Research & Development' 'Human Resources'] DistanceFromHome 29 [ 1 8 2 3 24 23 27 16 15 26 19 21 5 11 9 7 6 10 4 25 12 18 29 22 14 20 28 17 13] Education 5 [2 1 4 3 5] EducationField 6 ['Life Sciences' 'Other' 'Medical' 'Marketing' 'Technical Degree' 'Human Resources'] Gender 2 ['Female' 'Male'] EmployeeCount 1 [1] EmployeeNumber 1470 [ 1 2 4 ... 2064 2065 2068]
df.head()
| Age | Attrition | BusinessTravel | Department | DistanceFromHome | Education | EducationField | Gender | EmployeeCount | EmployeeNumber | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | Sales | 1 | 2 | Life Sciences | Female | 1 | 1 |
| 1 | 49 | No | Travel_Frequently | Research & Development | 8 | 1 | Life Sciences | Male | 1 | 2 |
| 2 | 37 | Yes | Travel_Rarely | Research & Development | 2 | 2 | Other | Male | 1 | 4 |
| 3 | 33 | No | Travel_Frequently | Research & Development | 3 | 4 | Life Sciences | Female | 1 | 5 |
| 4 | 27 | No | Travel_Rarely | Research & Development | 2 | 1 | Medical | Male | 1 | 7 |
df['EducationField'].value_counts()
Life Sciences 606 Medical 464 Marketing 159 Technical Degree 132 Other 82 Human Resources 27 Name: EducationField, dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1470 entries, 0 to 1469 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 1470 non-null int64 1 Attrition 1470 non-null object 2 BusinessTravel 1470 non-null object 3 Department 1470 non-null object 4 DistanceFromHome 1470 non-null int64 5 Education 1470 non-null int64 6 EducationField 1470 non-null object 7 Gender 1470 non-null object 8 EmployeeCount 1470 non-null int64 9 EmployeeNumber 1470 non-null int64 dtypes: int64(5), object(5) memory usage: 115.0+ KB
df.describe()
| Age | DistanceFromHome | Education | EmployeeCount | EmployeeNumber | |
|---|---|---|---|---|---|
| count | 1470.000000 | 1470.000000 | 1470.000000 | 1470.0 | 1470.000000 |
| mean | 36.923810 | 9.192517 | 2.912925 | 1.0 | 1024.865306 |
| std | 9.135373 | 8.106864 | 1.024165 | 0.0 | 602.024335 |
| min | 18.000000 | 1.000000 | 1.000000 | 1.0 | 1.000000 |
| 25% | 30.000000 | 2.000000 | 2.000000 | 1.0 | 491.250000 |
| 50% | 36.000000 | 7.000000 | 3.000000 | 1.0 | 1020.500000 |
| 75% | 43.000000 | 14.000000 | 4.000000 | 1.0 | 1555.750000 |
| max | 60.000000 | 29.000000 | 5.000000 | 1.0 | 2068.000000 |
plt.rcParams['figure.figsize'] = 10, 6
col_name = 'YearsAtCompany'
sns.countplot(x=col_name, data=df, palette='hls')
#seaborn countplotを設定し、変数axとしてAnnotationを上書できるようにする
ax = sns.countplot(x = col_name,
data = df)
#Annotation を設定する
for p in ax.patches:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2.,
p.get_height()),
ha = 'center',
va = 'center',
xytext = (0, 9),
textcoords = 'offset points',
fontsize = 8,
color = 'blue')
df[col_name].describe()
count 1470.000000 mean 7.008163 std 6.126525 min 0.000000 25% 3.000000 50% 5.000000 75% 9.000000 max 40.000000 Name: YearsAtCompany, dtype: float64
col_name = 'TotalWorkingYears'
sns.countplot(x=col_name, data=df, palette='hls')
#seaborn countplotを設定し、変数axとしてAnnotationを上書できるようにする
ax = sns.countplot(x = col_name,
data = df)
#Annotation を設定する
for p in ax.patches:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2.,
p.get_height()),
ha = 'center',
va = 'center',
xytext = (0, 9),
textcoords = 'offset points',
fontsize = 8,
color = 'blue')
df[col_name].describe()
count 1470.000000 mean 11.279592 std 7.780782 min 0.000000 25% 6.000000 50% 10.000000 75% 15.000000 max 40.000000 Name: TotalWorkingYears, dtype: float64
col_name = 'Age'
sns.countplot(x=col_name, data=df, palette='hls')
#seaborn countplotを設定し、変数axとしてAnnotationを上書できるようにする
ax = sns.countplot(x = col_name,
data = df)
#Annotation を設定する
for p in ax.patches:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2.,
p.get_height()),
ha = 'center',
va = 'center',
xytext = (0, 9),
textcoords = 'offset points',
fontsize = 8,
color = 'blue')
df[col_name].describe()
count 1470.000000 mean 36.923810 std 9.135373 min 18.000000 25% 30.000000 50% 36.000000 75% 43.000000 max 60.000000 Name: Age, dtype: float64
x_name = 'Age'
y_name = 'MonthlyRate'
sns.lineplot(data=df, x=x_name, y=y_name)
<AxesSubplot: xlabel='Age', ylabel='MonthlyRate'>
# 1,000ドル単位に下3桁で切り捨てて、それを列名'MonthlyRateに格納します
df['MonthlyRate'] = df['MonthlyIncome'].round(-3)
# MonthlyRateの度数分布を作成します
sns.countplot(x='MonthlyRate', data=df, palette='hls')
<AxesSubplot: xlabel='MonthlyRate', ylabel='count'>
ct = pd.crosstab(df['Age'], df['MonthlyRate'])
ct
| MonthlyRate | 1000 | 2000 | 3000 | 4000 | 5000 | 6000 | 7000 | 8000 | 9000 | 10000 | 11000 | 12000 | 13000 | 14000 | 15000 | 16000 | 17000 | 18000 | 19000 | 20000 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | ||||||||||||||||||||
| 18 | 3 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19 | 2 | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 20 | 1 | 2 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 21 | 2 | 3 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 22 | 0 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 23 | 1 | 7 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 24 | 0 | 8 | 7 | 8 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 25 | 1 | 4 | 3 | 9 | 5 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 26 | 0 | 11 | 11 | 8 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 27 | 0 | 13 | 11 | 9 | 4 | 5 | 2 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 28 | 1 | 12 | 14 | 5 | 8 | 1 | 3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 29 | 1 | 16 | 15 | 10 | 6 | 5 | 4 | 5 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 30 | 2 | 11 | 11 | 7 | 10 | 4 | 6 | 2 | 2 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 31 | 3 | 6 | 13 | 10 | 8 | 7 | 8 | 3 | 1 | 4 | 3 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 32 | 1 | 7 | 12 | 9 | 13 | 6 | 4 | 0 | 2 | 5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 33 | 0 | 8 | 14 | 6 | 11 | 5 | 3 | 3 | 1 | 1 | 0 | 2 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 34 | 0 | 8 | 15 | 11 | 16 | 9 | 5 | 3 | 2 | 5 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 35 | 1 | 12 | 9 | 12 | 14 | 6 | 4 | 4 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 36 | 0 | 5 | 13 | 5 | 13 | 10 | 7 | 6 | 3 | 6 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 37 | 0 | 4 | 7 | 13 | 3 | 8 | 4 | 1 | 2 | 3 | 1 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 |
| 38 | 0 | 6 | 11 | 6 | 8 | 8 | 5 | 3 | 3 | 3 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 39 | 0 | 3 | 1 | 6 | 8 | 5 | 1 | 3 | 0 | 2 | 3 | 2 | 3 | 0 | 0 | 0 | 2 | 0 | 3 | 0 |
| 40 | 0 | 8 | 5 | 8 | 5 | 6 | 4 | 2 | 1 | 4 | 2 | 0 | 3 | 2 | 0 | 1 | 1 | 1 | 2 | 2 |
| 41 | 0 | 5 | 5 | 2 | 4 | 5 | 3 | 2 | 3 | 1 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 0 | 3 | 2 |
| 42 | 0 | 2 | 7 | 6 | 9 | 4 | 4 | 0 | 0 | 1 | 0 | 0 | 2 | 2 | 0 | 1 | 1 | 4 | 2 | 1 |
| 43 | 0 | 5 | 1 | 2 | 6 | 3 | 1 | 2 | 0 | 2 | 2 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 1 |
| 44 | 0 | 6 | 7 | 1 | 4 | 4 | 0 | 2 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 2 | 1 |
| 45 | 0 | 5 | 2 | 6 | 6 | 5 | 2 | 0 | 2 | 2 | 3 | 0 | 2 | 0 | 0 | 0 | 2 | 2 | 2 | 0 |
| 46 | 0 | 0 | 1 | 0 | 4 | 2 | 2 | 1 | 4 | 4 | 3 | 0 | 1 | 0 | 0 | 0 | 4 | 2 | 4 | 1 |
| 47 | 0 | 1 | 3 | 1 | 7 | 1 | 0 | 0 | 1 | 1 | 0 | 2 | 1 | 1 | 0 | 1 | 2 | 1 | 0 | 1 |
| 48 | 0 | 2 | 2 | 2 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | 1 | 0 | 2 | 0 | 2 | 1 | 0 | 1 |
| 49 | 0 | 0 | 2 | 2 | 4 | 0 | 4 | 1 | 0 | 1 | 1 | 0 | 1 | 2 | 1 | 1 | 0 | 0 | 3 | 1 |
| 50 | 0 | 1 | 2 | 2 | 1 | 1 | 2 | 0 | 1 | 1 | 2 | 0 | 1 | 2 | 0 | 0 | 3 | 5 | 4 | 2 |
| 51 | 0 | 2 | 2 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 2 | 1 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 2 |
| 52 | 0 | 0 | 4 | 1 | 1 | 1 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 3 | 0 | 1 | 2 |
| 53 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 2 | 0 | 2 | 1 | 1 | 1 | 1 | 3 | 0 | 1 | 1 | 2 | 0 |
| 54 | 0 | 0 | 1 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 0 |
| 55 | 0 | 0 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 2 | 2 | 0 | 3 | 0 | 4 | 3 |
| 56 | 0 | 2 | 2 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 57 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58 | 0 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 2 | 0 | 1 | 1 | 1 |
| 59 | 0 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 60 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
# figsizeを変更するモジュール rcParsmsを読み込みます
from pylab import rcParams
# figsize を24 x 13インチにします
rcParams['figure.figsize'] = 24,13
# クロス集計表のデータフレーム ct のヒートマップを作成します。
# 色だけでなく、度数も表示するため、annot = True を指定します
sns.heatmap(ct, annot=True, cmap='BuGn')
# カレントディレクトリにヒートマップ図をimg_hr.pngとして保存します
plt.savefig("img_hr.png")
pwd
'D:\\jupyter'
import matplotlib.pyplot as plt
import numpy as np
x = np.random.rand(1000)
y = np.random.rand(1000)
z = np.random.rand(1000)
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(x, y, z, marker=".", color='red')
plt.show()
ct
| MonthlyRate | 1000 | 2000 | 3000 | 4000 | 5000 | 6000 | 7000 | 8000 | 9000 | 10000 | 11000 | 12000 | 13000 | 14000 | 15000 | 16000 | 17000 | 18000 | 19000 | 20000 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | ||||||||||||||||||||
| 18 | 3 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19 | 2 | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 20 | 1 | 2 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 21 | 2 | 3 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 22 | 0 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 23 | 1 | 7 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 24 | 0 | 8 | 7 | 8 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 25 | 1 | 4 | 3 | 9 | 5 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 26 | 0 | 11 | 11 | 8 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 27 | 0 | 13 | 11 | 9 | 4 | 5 | 2 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 28 | 1 | 12 | 14 | 5 | 8 | 1 | 3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 29 | 1 | 16 | 15 | 10 | 6 | 5 | 4 | 5 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 30 | 2 | 11 | 11 | 7 | 10 | 4 | 6 | 2 | 2 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 31 | 3 | 6 | 13 | 10 | 8 | 7 | 8 | 3 | 1 | 4 | 3 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 32 | 1 | 7 | 12 | 9 | 13 | 6 | 4 | 0 | 2 | 5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 33 | 0 | 8 | 14 | 6 | 11 | 5 | 3 | 3 | 1 | 1 | 0 | 2 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 34 | 0 | 8 | 15 | 11 | 16 | 9 | 5 | 3 | 2 | 5 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 35 | 1 | 12 | 9 | 12 | 14 | 6 | 4 | 4 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 36 | 0 | 5 | 13 | 5 | 13 | 10 | 7 | 6 | 3 | 6 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 37 | 0 | 4 | 7 | 13 | 3 | 8 | 4 | 1 | 2 | 3 | 1 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 |
| 38 | 0 | 6 | 11 | 6 | 8 | 8 | 5 | 3 | 3 | 3 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 39 | 0 | 3 | 1 | 6 | 8 | 5 | 1 | 3 | 0 | 2 | 3 | 2 | 3 | 0 | 0 | 0 | 2 | 0 | 3 | 0 |
| 40 | 0 | 8 | 5 | 8 | 5 | 6 | 4 | 2 | 1 | 4 | 2 | 0 | 3 | 2 | 0 | 1 | 1 | 1 | 2 | 2 |
| 41 | 0 | 5 | 5 | 2 | 4 | 5 | 3 | 2 | 3 | 1 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 0 | 3 | 2 |
| 42 | 0 | 2 | 7 | 6 | 9 | 4 | 4 | 0 | 0 | 1 | 0 | 0 | 2 | 2 | 0 | 1 | 1 | 4 | 2 | 1 |
| 43 | 0 | 5 | 1 | 2 | 6 | 3 | 1 | 2 | 0 | 2 | 2 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 1 |
| 44 | 0 | 6 | 7 | 1 | 4 | 4 | 0 | 2 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 2 | 1 |
| 45 | 0 | 5 | 2 | 6 | 6 | 5 | 2 | 0 | 2 | 2 | 3 | 0 | 2 | 0 | 0 | 0 | 2 | 2 | 2 | 0 |
| 46 | 0 | 0 | 1 | 0 | 4 | 2 | 2 | 1 | 4 | 4 | 3 | 0 | 1 | 0 | 0 | 0 | 4 | 2 | 4 | 1 |
| 47 | 0 | 1 | 3 | 1 | 7 | 1 | 0 | 0 | 1 | 1 | 0 | 2 | 1 | 1 | 0 | 1 | 2 | 1 | 0 | 1 |
| 48 | 0 | 2 | 2 | 2 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | 1 | 0 | 2 | 0 | 2 | 1 | 0 | 1 |
| 49 | 0 | 0 | 2 | 2 | 4 | 0 | 4 | 1 | 0 | 1 | 1 | 0 | 1 | 2 | 1 | 1 | 0 | 0 | 3 | 1 |
| 50 | 0 | 1 | 2 | 2 | 1 | 1 | 2 | 0 | 1 | 1 | 2 | 0 | 1 | 2 | 0 | 0 | 3 | 5 | 4 | 2 |
| 51 | 0 | 2 | 2 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 2 | 1 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 2 |
| 52 | 0 | 0 | 4 | 1 | 1 | 1 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 3 | 0 | 1 | 2 |
| 53 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 2 | 0 | 2 | 1 | 1 | 1 | 1 | 3 | 0 | 1 | 1 | 2 | 0 |
| 54 | 0 | 0 | 1 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 0 |
| 55 | 0 | 0 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 2 | 2 | 0 | 3 | 0 | 4 | 3 |
| 56 | 0 | 2 | 2 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 57 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58 | 0 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 2 | 0 | 1 | 1 | 1 |
| 59 | 0 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 60 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
ct.reset_index()
| MonthlyRate | Age | 1000 | 2000 | 3000 | 4000 | 5000 | 6000 | 7000 | 8000 | 9000 | ... | 11000 | 12000 | 13000 | 14000 | 15000 | 16000 | 17000 | 18000 | 19000 | 20000 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 18 | 3 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 19 | 2 | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 20 | 1 | 2 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 21 | 2 | 3 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 22 | 0 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 5 | 23 | 1 | 7 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 6 | 24 | 0 | 8 | 7 | 8 | 3 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7 | 25 | 1 | 4 | 3 | 9 | 5 | 3 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 8 | 26 | 0 | 11 | 11 | 8 | 6 | 3 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 9 | 27 | 0 | 13 | 11 | 9 | 4 | 5 | 2 | 0 | 2 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 10 | 28 | 1 | 12 | 14 | 5 | 8 | 1 | 3 | 2 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 11 | 29 | 1 | 16 | 15 | 10 | 6 | 5 | 4 | 5 | 2 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 12 | 30 | 2 | 11 | 11 | 7 | 10 | 4 | 6 | 2 | 2 | ... | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 13 | 31 | 3 | 6 | 13 | 10 | 8 | 7 | 8 | 3 | 1 | ... | 3 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 14 | 32 | 1 | 7 | 12 | 9 | 13 | 6 | 4 | 0 | 2 | ... | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 15 | 33 | 0 | 8 | 14 | 6 | 11 | 5 | 3 | 3 | 1 | ... | 0 | 2 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 16 | 34 | 0 | 8 | 15 | 11 | 16 | 9 | 5 | 3 | 2 | ... | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 17 | 35 | 1 | 12 | 9 | 12 | 14 | 6 | 4 | 4 | 6 | ... | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 18 | 36 | 0 | 5 | 13 | 5 | 13 | 10 | 7 | 6 | 3 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19 | 37 | 0 | 4 | 7 | 13 | 3 | 8 | 4 | 1 | 2 | ... | 1 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 |
| 20 | 38 | 0 | 6 | 11 | 6 | 8 | 8 | 5 | 3 | 3 | ... | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 21 | 39 | 0 | 3 | 1 | 6 | 8 | 5 | 1 | 3 | 0 | ... | 3 | 2 | 3 | 0 | 0 | 0 | 2 | 0 | 3 | 0 |
| 22 | 40 | 0 | 8 | 5 | 8 | 5 | 6 | 4 | 2 | 1 | ... | 2 | 0 | 3 | 2 | 0 | 1 | 1 | 1 | 2 | 2 |
| 23 | 41 | 0 | 5 | 5 | 2 | 4 | 5 | 3 | 2 | 3 | ... | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 0 | 3 | 2 |
| 24 | 42 | 0 | 2 | 7 | 6 | 9 | 4 | 4 | 0 | 0 | ... | 0 | 0 | 2 | 2 | 0 | 1 | 1 | 4 | 2 | 1 |
| 25 | 43 | 0 | 5 | 1 | 2 | 6 | 3 | 1 | 2 | 0 | ... | 2 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 1 |
| 26 | 44 | 0 | 6 | 7 | 1 | 4 | 4 | 0 | 2 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 2 | 1 |
| 27 | 45 | 0 | 5 | 2 | 6 | 6 | 5 | 2 | 0 | 2 | ... | 3 | 0 | 2 | 0 | 0 | 0 | 2 | 2 | 2 | 0 |
| 28 | 46 | 0 | 0 | 1 | 0 | 4 | 2 | 2 | 1 | 4 | ... | 3 | 0 | 1 | 0 | 0 | 0 | 4 | 2 | 4 | 1 |
| 29 | 47 | 0 | 1 | 3 | 1 | 7 | 1 | 0 | 0 | 1 | ... | 0 | 2 | 1 | 1 | 0 | 1 | 2 | 1 | 0 | 1 |
| 30 | 48 | 0 | 2 | 2 | 2 | 2 | 1 | 0 | 2 | 0 | ... | 1 | 0 | 1 | 0 | 2 | 0 | 2 | 1 | 0 | 1 |
| 31 | 49 | 0 | 0 | 2 | 2 | 4 | 0 | 4 | 1 | 0 | ... | 1 | 0 | 1 | 2 | 1 | 1 | 0 | 0 | 3 | 1 |
| 32 | 50 | 0 | 1 | 2 | 2 | 1 | 1 | 2 | 0 | 1 | ... | 2 | 0 | 1 | 2 | 0 | 0 | 3 | 5 | 4 | 2 |
| 33 | 51 | 0 | 2 | 2 | 0 | 3 | 1 | 1 | 0 | 0 | ... | 2 | 1 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 2 |
| 34 | 52 | 0 | 0 | 4 | 1 | 1 | 1 | 0 | 2 | 0 | ... | 0 | 0 | 1 | 1 | 0 | 0 | 3 | 0 | 1 | 2 |
| 35 | 53 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 2 | 0 | ... | 1 | 1 | 1 | 1 | 3 | 0 | 1 | 1 | 2 | 0 |
| 36 | 54 | 0 | 0 | 1 | 2 | 2 | 1 | 1 | 0 | 0 | ... | 4 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 0 |
| 37 | 55 | 0 | 0 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 2 | 2 | 0 | 3 | 0 | 4 | 3 |
| 38 | 56 | 0 | 2 | 2 | 1 | 2 | 1 | 1 | 0 | 0 | ... | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 39 | 57 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 40 | 58 | 0 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 2 | 0 | 1 | 1 | 1 |
| 41 | 59 | 0 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | ... | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 42 | 60 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
43 rows × 21 columns
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
df = pd.DataFrame({'A':[0,1,2,3,4],
'B':[0,1,4,9,16],
'C':[0,1,8,27,64]})
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter(df.A, df.B, df.C, s=100)
df
| A | B | C | |
|---|---|---|---|
| 0 | 0 | 0 | 0 |
| 1 | 1 | 1 | 1 |
| 2 | 2 | 4 | 8 |
| 3 | 3 | 9 | 27 |
| 4 | 4 | 16 | 64 |
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
df = pd.DataFrame({'A':[0,1,2,3,4],
'B':[0,1,4,9,16],
'C':[0,1,8,27,64]})
#文字だとプロットできないのでcolumn名を数字に変更
df.rename(columns={'A':1, 'B':2, 'C':3}, inplace=True)
X = np.array([])
Y = np.array([])
Z = np.array([])
for i in range(df.index.size):
X = np.concatenate([X, np.full(df.columns.size, df.index[i])], 0)
for i in range(df.index.size):
Y = np.concatenate([Y, np.array(df.columns)], 0)
for i in range(df.index.size):
Z = np.concatenate([Z, np.array(df[i:i+1])[0]], 0)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter(X, Y, Z)
df
| 1 | 2 | 3 | |
|---|---|---|---|
| 0 | 0 | 0 | 0 |
| 1 | 1 | 1 | 1 |
| 2 | 2 | 4 | 8 |
| 3 | 3 | 9 | 27 |
| 4 | 4 | 16 | 64 |
ct
| MonthlyRate | 1000 | 2000 | 3000 | 4000 | 5000 | 6000 | 7000 | 8000 | 9000 | 10000 | 11000 | 12000 | 13000 | 14000 | 15000 | 16000 | 17000 | 18000 | 19000 | 20000 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | ||||||||||||||||||||
| 18 | 3 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19 | 2 | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 20 | 1 | 2 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 21 | 2 | 3 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 22 | 0 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 23 | 1 | 7 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 24 | 0 | 8 | 7 | 8 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 25 | 1 | 4 | 3 | 9 | 5 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 26 | 0 | 11 | 11 | 8 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 27 | 0 | 13 | 11 | 9 | 4 | 5 | 2 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 28 | 1 | 12 | 14 | 5 | 8 | 1 | 3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 29 | 1 | 16 | 15 | 10 | 6 | 5 | 4 | 5 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 30 | 2 | 11 | 11 | 7 | 10 | 4 | 6 | 2 | 2 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 31 | 3 | 6 | 13 | 10 | 8 | 7 | 8 | 3 | 1 | 4 | 3 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 32 | 1 | 7 | 12 | 9 | 13 | 6 | 4 | 0 | 2 | 5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 33 | 0 | 8 | 14 | 6 | 11 | 5 | 3 | 3 | 1 | 1 | 0 | 2 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 34 | 0 | 8 | 15 | 11 | 16 | 9 | 5 | 3 | 2 | 5 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 35 | 1 | 12 | 9 | 12 | 14 | 6 | 4 | 4 | 6 | 7 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 36 | 0 | 5 | 13 | 5 | 13 | 10 | 7 | 6 | 3 | 6 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 37 | 0 | 4 | 7 | 13 | 3 | 8 | 4 | 1 | 2 | 3 | 1 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 |
| 38 | 0 | 6 | 11 | 6 | 8 | 8 | 5 | 3 | 3 | 3 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 39 | 0 | 3 | 1 | 6 | 8 | 5 | 1 | 3 | 0 | 2 | 3 | 2 | 3 | 0 | 0 | 0 | 2 | 0 | 3 | 0 |
| 40 | 0 | 8 | 5 | 8 | 5 | 6 | 4 | 2 | 1 | 4 | 2 | 0 | 3 | 2 | 0 | 1 | 1 | 1 | 2 | 2 |
| 41 | 0 | 5 | 5 | 2 | 4 | 5 | 3 | 2 | 3 | 1 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 0 | 3 | 2 |
| 42 | 0 | 2 | 7 | 6 | 9 | 4 | 4 | 0 | 0 | 1 | 0 | 0 | 2 | 2 | 0 | 1 | 1 | 4 | 2 | 1 |
| 43 | 0 | 5 | 1 | 2 | 6 | 3 | 1 | 2 | 0 | 2 | 2 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 1 |
| 44 | 0 | 6 | 7 | 1 | 4 | 4 | 0 | 2 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 2 | 1 |
| 45 | 0 | 5 | 2 | 6 | 6 | 5 | 2 | 0 | 2 | 2 | 3 | 0 | 2 | 0 | 0 | 0 | 2 | 2 | 2 | 0 |
| 46 | 0 | 0 | 1 | 0 | 4 | 2 | 2 | 1 | 4 | 4 | 3 | 0 | 1 | 0 | 0 | 0 | 4 | 2 | 4 | 1 |
| 47 | 0 | 1 | 3 | 1 | 7 | 1 | 0 | 0 | 1 | 1 | 0 | 2 | 1 | 1 | 0 | 1 | 2 | 1 | 0 | 1 |
| 48 | 0 | 2 | 2 | 2 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | 1 | 0 | 2 | 0 | 2 | 1 | 0 | 1 |
| 49 | 0 | 0 | 2 | 2 | 4 | 0 | 4 | 1 | 0 | 1 | 1 | 0 | 1 | 2 | 1 | 1 | 0 | 0 | 3 | 1 |
| 50 | 0 | 1 | 2 | 2 | 1 | 1 | 2 | 0 | 1 | 1 | 2 | 0 | 1 | 2 | 0 | 0 | 3 | 5 | 4 | 2 |
| 51 | 0 | 2 | 2 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 2 | 1 | 2 | 2 | 0 | 1 | 0 | 0 | 0 | 2 |
| 52 | 0 | 0 | 4 | 1 | 1 | 1 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 3 | 0 | 1 | 2 |
| 53 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 2 | 0 | 2 | 1 | 1 | 1 | 1 | 3 | 0 | 1 | 1 | 2 | 0 |
| 54 | 0 | 0 | 1 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 1 | 3 | 1 | 1 | 0 |
| 55 | 0 | 0 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 2 | 2 | 0 | 3 | 0 | 4 | 3 |
| 56 | 0 | 2 | 2 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 57 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58 | 0 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 2 | 0 | 1 | 1 | 1 |
| 59 | 0 | 2 | 1 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 60 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
#ct2 = ct.reset_index()
#ct2
#ct4 = ct2.set_index('Age')
#ct4
df = ct
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
#文字だとプロットできないのでcolumn名を数字に変更
#df.rename(columns={'A':1, 'B':2, 'C':3}, inplace=True)
X = np.array([])
Y = np.array([])
Z = np.array([])
for i in range(df.index.size):
X = np.concatenate([X, np.full(df.columns.size, df.index[i])], 0)
for i in range(df.index.size):
Y = np.concatenate([Y, np.array(df.columns)], 0)
for i in range(df.index.size):
Z = np.concatenate([Z, np.array(df[i:i+1])[0]], 0)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter(X, Y, Z)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
#タイトルで漢字が使えるようフォントを設定
plt.rcParams['font.family'] = 'Meiryo'
#描画エリアの作成
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
#グラフタイトルを設定
ax.set_title("3D散布図",size=20)
#軸ラベルのサイズと色を設定
ax.set_xlabel("x軸",size=15,color="black")
ax.set_ylabel("y軸",size=15,color="black")
ax.set_zlabel("z軸",size=15,color="black")
#numpyを使ってXYZの値を設定
x = np.random.rand(200,1)
y = np.random.rand(200,1)
z = np.random.rand(200,1)
#散布図の作成
ax.scatter(x,y,z,s=40,c="red")
#描画
plt.show()
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
#タイトルで漢字が使えるようフォントを設定
plt.rcParams['font.family'] = 'Meiryo'
#描画エリアの作成
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
#グラフタイトルを設定
ax.set_title("3D散布図",size=20)
#軸ラベルのサイズと色を設定
ax.set_xlabel("x軸",size=15,color="black")
ax.set_ylabel("y軸",size=15,color="black")
ax.set_zlabel("z軸",size=15,color="black")
#x,yデータの作成
data = np.linspace(-3*np.pi,3*np.pi,50)
x, y = np.meshgrid(data,data)
# zデータの作成
z = np.cos(x/np.pi)*np.sin(y/np.pi)
#散布図の作成
#ワイヤーフレームを描く
ax.plot_wireframe(x,y,z,color="darkblue")
#ax.scatter(x,y,z,s=1,c="red")
#描画
plt.show()
import numpy as np
x = np.arange(0, 6, 1)
y = np.arange(0, 4, 1)
X, Y = np.meshgrid(x, y)
print(X)
[[0 1 2 3 4 5] [0 1 2 3 4 5] [0 1 2 3 4 5] [0 1 2 3 4 5]]
import numpy as np
x = np.arange(0, 6, 1)
y = np.arange(0, 6, 1)
Z = np.meshgrid(x, y)
print(Z)
[array([[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5]]), array([[0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2],
[3, 3, 3, 3, 3, 3],
[4, 4, 4, 4, 4, 4],
[5, 5, 5, 5, 5, 5]])]
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
#タイトルで漢字が使えるようフォントを設定
plt.rcParams['font.family'] = 'Meiryo'
#描画エリアの作成
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
#グラフタイトルを設定
ax.set_title("3D散布図",size=20)
#軸ラベルのサイズと色を設定
ax.set_xlabel("x軸",size=15,color="black")
ax.set_ylabel("y軸",size=15,color="black")
ax.set_zlabel("z軸",size=15,color="black")
#numpyを使ってXYZの値を設定
#x = np.random.rand(200,1)
#y = np.random.rand(200,1)
#Z = np.random.rand(200,1)
#散布図の作成
ax.plot_wireframe(x,y,Z,s=40,c="red")
#描画
plt.show()
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import multivariate_normal
N = 60
X = np.linspace(-3, 3, N)
Y = np.linspace(-3, 4, N)
X, Y = np.meshgrid(X, Y)
mu = np.array([0., 1.])
Sigma = np.array([[ 1. , -0.5], [-0.5, 1.5]])
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X
pos[:, :, 1] = Y
F = multivariate_normal(mu, Sigma)
Z = F.pdf(pos)
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
#ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=3, cstride=3, linewidth=1, antialiased=True, cmap=cm.viridis)
plt.show()
print(z)
ct4
ct3.to_csv('ct4.csv', encoding='utf-8', index=False)
ct3 = ct2.reset_index()
ct3
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
X = np.array([])
Y = np.array([])
Z = np.array([])
for i in range(df.index.size):
X = np.concatenate([X, np.full(df.columns.size, df.index[i])], 0)
for i in range(df.index.size):
Y = np.concatenate([Y, np.array(df.columns)], 0)
for i in range(df.index.size):
Z = np.concatenate([Z, np.array(df[i:i+1])[0]], 0)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc = ax.bar3d(X, Y, Z)
df['Age']
df['MonthlyRate']
#input
import matplotlib.pyplot as plt
import numpy as np
#fig = plt.figure()
ax = fig.add_subplot(projection='3d')
x = df['Age']
y = df['MonthlyRate']
xx, yy = np.meshgrid(x, y)
xr = xx.ravel()
yr = yy.ravel()
height = xr + yr + 1
bottom = np.ones_like(height)
width = 0.5
depth = 0.5
#ax.bar3d(xr, yr, bottom, width, depth, height, color='lime')
plt.show()
#input
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
x = np.arange(3)
y = np.arange(3)
xx, yy = np.meshgrid(x, y)
xr = xx.ravel()
yr = yy.ravel()
height = xr + yr + 1
bottom = np.ones_like(height)
width = 0.5
depth = 0.5
ax.bar3d(xr, yr, bottom, width, depth, height, color='lime')
plt.show()
print(xr)
print(xx)
## Drop veil-type because it offers no information
print("Before", df.shape)
df = df.drop(['EmployeeCount', 'Over18', 'StandardHours'], axis = 1)
print("After", df.shape)
df_overview = df_overview.drop(['EmployeeCount', 'Over18', 'StandardHours'], axis = 0)
df_overview
## Examine unique values
for i in df.columns:
print(i, df[i].unique())
df.shape
#mkdir sqliteDB
cd sqliteDB
ls
import sqlite3
#HR_Employee_Attrition.db という名前のDBをなければ作成して接続する
dbname = 'HR_Employee_Attrition.db'
conn = sqlite3.connect(dbname)
cur = conn.cursor()
#DBファイルが作成されたかを確認する
%ls
# tableのnameを"HR_Employee_Attrition_tab"とし、読み込んだcsvファイルをsqlに書き込む
# index=False としてindex は書き込まないようにする
# DBには今回のテーブルが一つだけあることになる
# if_existsで書き換える
df.to_sql('HR_Employee_Attrition_tab', conn, if_exists='replace', index=False)
#DBファイルにデータが書き込まれたことを確認する
%ls
dbname = "HR_Employee_Attrition.db"
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをpandasで読み出す。
df = pd.read_sql('SELECT * FROM HR_Employee_Attrition_tab', conn)
cur.close()
conn.close()
df
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをSQL 文を使ってpandas Dataframeで読み出す。
df = pd.read_sql('SELECT "Department", "Age", "Attrition", "BusinessTravel", "DailyRate" FROM HR_Employee_Attrition_tab WHERE "BusinessTravel"="Travel_Frequently" AND "DailyRate" > 1350' , conn)
df
query = """
SELECT
"Department",
"Age",
"Attrition",
"BusinessTravel",
"DailyRate"
FROM "HR_Employee_Attrition_tab"
WHERE "BusinessTravel"="Travel_Frequently"
AND "DailyRate" < 1350
AND "Age" > 36
"""
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをSQL 文を使ってpandas Dataframeで読み出す。
df = pd.read_sql(query, conn)
df
dbname = "HR_Employee_Attrition.db"
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをpandasで読み出す。
# WHERE で条件に合致した要素のみを読み込み、それをDataFrame に格納する
df = pd.read_sql('SELECT "Department", "Age", "Attrition" FROM HR_Employee_Attrition_tab WHERE "TotalWorkingYears" > 15 ', conn)
df
dbname = "HR_Employee_Attrition.db"
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをpandasで読み出す。
df = pd.read_sql('SELECT * FROM HR_Employee_Attrition_tab', conn)
cur.close()
conn.close()
pd.options.display.max_rows = 220
columns = df.columns
columns = pd.DataFrame(columns)
columns.head(220)
df
df["Department"].value_counts()
dbname = "HR_Employee_Attrition.db"
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをpandasで読み出す。
df = pd.read_sql('SELECT * FROM HR_Employee_Attrition_tab', conn)
df
df["EducationField"].value_counts()
df = df[df["EducationField"]=="Life Sciences"]
df
dbname = "HR_Employee_Attrition.db"
conn = sqlite3.connect(dbname)
cur = conn.cursor()
# dbをpandasで読み出す。
df = pd.read_sql('SELECT * FROM HR_Employee_Attrition_tab', conn)
df